Code for the study:
Longitudinal IgG antibody responses to Plasmodium vivax blood-stage antigens during and after acute vivax malaria in individuals living in the Brazilian Amazon
Tenzin Tashi, Aditi Upadhye, Prasun Kundu, Chunxiang Wu, Sébastien Menant, Roberta Reis Soares, Marcelo Urbano Ferreira, Rhea J. Longley, Ivo Mueller, Quyen Q. Hoang, Wai-Hong Tham, Julian C. Rayner, Kézia KG Scopel, Josué C. Lima Junior, Tuan M. Tran
medRxiv 2022.08.30.22279402; doi: https://doi.org/10.1101/2022.08.30.22279402
Table 1. Participant characteristics
Fig. 1. Correlations of antigen-specific IgG responses
Fig. 2. Seropositivity at each timepoint by antigen for subjects with data
Fig. S1. Seropositivity by antigen by study site
Fig. 3. Reactivity over time points, faceted by Pv antigen, as ln(max FI) per subject, exponential decay
Fig. 4 Forest Plot comparing Tenshi et al vs Longley et al
Table 2. Half-lives, linear-mixed effects regression, unadjusted analysis
Table S2. Half-lives, linear-mixed effects regression, adjusted analysis
knitr::opts_chunk$set(echo = FALSE)
library(tidyverse)
library(readxl)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(pals)
library(corrplot)
library(bookdown)
library(googledrive)
datadir <- "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Vivax Brazil Immune Responses/PvBrazilAbKinetics/"
resdir <-"/Volumes/GoogleDrive/My Drive/Manuscripts/Brazil Antibody Kinetics Manuscript/Figures and Tables/"
figdir <- "/Volumes/GoogleDrive/My Drive/Manuscripts/Brazil Antibody Kinetics Manuscript/Figures and Tables/"
#read in all data
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1QaaBFN4rPDqjO_hhxvjB-VF5lsjYE5AG"), path = temp, overwrite = TRUE)
#alldat <- readRDS(paste0(datadir,"Brazil Pvivax dataframe Aditi 02282022.rds")) %>% # local path
alldat <- readRDS(file = dl$local_path) %>%
dplyr::filter(Dilution == "1:400") %>%
mutate(Antigen = gsub("DBP-FL", "PvDBP-FL", Antigen)) %>%
mutate(Antigen = gsub("MSP3a", "PvMSP3a", Antigen)) %>%
mutate(Antigen = gsub("EBP", "PvEBP", Antigen)) %>%
mutate(Antigen = gsub("CD4", "ratCD4", Antigen)) %>%
mutate(Antigen = factor(Antigen, levels = c("BSA","ratCD4", "Thioredoxin", "Tetanus Toxoid",
"Pv12",
"Pv41",
"PvDBP-FL",
"PvEBP",
"PvGAMA",
"PvMSP3a",
"PvRBP1a",
"PvRBP2b",
"Pvx081550"))) %>%
mutate(Antigen = gsub("Pvx081550", "PVX_081550", Antigen)) %>%
mutate(Antigen = gsub("Tetanus Toxoid", "tetanus toxoid", Antigen)) %>%
mutate(AntigenType = factor(AntigenType, levels = c("background control", "reactivity control", "P. vivax antigen")))
#read in cleaned data
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1kOau1oVMp0tvGIPDHMFpAaCMKf4suBJs"), path = temp, overwrite = TRUE)
#alldat_norm_bkgd_rem <- readRDS(paste0(datadir, "alldat_norm_bkgd_rem.rds")) %>% #local path
alldat_norm_bkgd_rem <- readRDS(file = dl$local_path) %>%
mutate(Antigen = gsub("Tetanus Toxoid", "tetanus toxoid", Antigen))
Make table of participant characteristics
#https://www.r-bloggers.com/2021/11/publication-ready-tables-with-flextable-and-own-theme-in-r/
library(tableone)
library(flextable)
library(officer)
source("/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Vivax Brazil Immune Responses/PvBrazilAbKinetics/customtab.R")
tab_dat <- alldat %>%
dplyr::select(Subject, city, gender, age, "Number of prior malaria episodes", "Years of Amanzonia State") %>%
distinct() %>%
dplyr::rename("Years residence in Amazon basin" = "Years of Amanzonia State",
City = city,
Gender = gender,
"Age, years" = age) %>%
mutate(Gender = factor(Gender, levels = c("F", "M"), labels = c("female", "male"))) %>%
mutate(`Years residence in Amazon basin` = as.numeric(`Years residence in Amazon basin`)) %>%
mutate(`Number of prior malaria episodes`= as.numeric(`Number of prior malaria episodes`)) %>%
filter(!grepl("NAC",Subject)) %>%
filter(!grepl("pos",Subject))
myVars <- c("Gender", "Age, years", "Number of prior malaria episodes", "Years residence in Amazon basin")
catVars <- c("Gender")
nonnormalVars <- c("Age, years", "Number of prior malaria episodes", "Years residence in Amazon basin")
tab1 <- tab_dat %>% CreateTableOne(vars = myVars,
strata = "City",
data = . ,
factorVars = catVars,
addOverall = T,
test = T)
tab1_word <- print(tab1,
quote = F,
noSpaces = T,
test = T,
contDigits = 1,
printToggle = T,
dropEqual = T,
explain = T,
nonnormal = nonnormalVars,
minMax = TRUE)
## Stratified by City
## Overall
## n 47
## Gender (%) 34 (72.3)
## Age, years (median [range]) 32.0 [12.0, 64.0]
## Number of prior malaria episodes (median [range]) 3.0 [0.0, 30.0]
## Years residence in Amazon basin (median [range]) 26.0 [7.0, 64.0]
## Stratified by City
## Acrelândia
## n 10
## Gender (%) 7 (70.0)
## Age, years (median [range]) 31.0 [14.0, 55.0]
## Number of prior malaria episodes (median [range]) 1.0 [0.0, 6.0]
## Years residence in Amazon basin (median [range]) 21.0 [8.0, 43.0]
## Stratified by City
## Califónia
## n 6
## Gender (%) 3 (50.0)
## Age, years (median [range]) 41.0 [29.0, 56.0]
## Number of prior malaria episodes (median [range]) 3.0 [0.0, 15.0]
## Years residence in Amazon basin (median [range]) 33.0 [7.0, 54.0]
## Stratified by City
## Jéssica
## n 19
## Gender (%) 15 (78.9)
## Age, years (median [range]) 30.0 [18.0, 56.0]
## Number of prior malaria episodes (median [range]) 3.0 [0.0, 30.0]
## Years residence in Amazon basin (median [range]) 30.0 [18.0, 54.0]
## Stratified by City
## Plácido p
## n 12
## Gender (%) 9 (75.0) 0.576
## Age, years (median [range]) 23.0 [12.0, 64.0] 0.183
## Number of prior malaria episodes (median [range]) 2.0 [0.0, 10.0] 0.211
## Years residence in Amazon basin (median [range]) 19.0 [7.0, 64.0] 0.033
## Stratified by City
## test
## n
## Gender (%)
## Age, years (median [range]) nonnorm
## Number of prior malaria episodes (median [range]) nonnorm
## Years residence in Amazon basin (median [range]) nonnorm
# Convert to dataframe
tab1_df <- as.data.frame(tab1_word) %>% rownames_to_column(var = "Variable")
# Set Table header
header <- str_squish(str_remove("Table 1. Baseline characteristics of study participants", "\n"))
# Set Table footer
footer <- str_squish(str_remove("Numbers are n. (%) unless otherwise noted. Fisher's exact test used for categorical variables. Kruskal-wallis Rank Sum Test used for continuous variables.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_1 <- custom_tab(tab1_df, header, footer)
Table 1. Baseline characteristics of study participants | |||||||
Variable | Overall | Acrelândia | Califónia | Jéssica | Plácido | p | test |
n | 47 | 10 | 6 | 19 | 12 | ||
Gender (%) | 34 (72.3) | 7 (70.0) | 3 (50.0) | 15 (78.9) | 9 (75.0) | 0.576 | |
Age, years (median [range]) | 32.0 [12.0, 64.0] | 31.0 [14.0, 55.0] | 41.0 [29.0, 56.0] | 30.0 [18.0, 56.0] | 23.0 [12.0, 64.0] | 0.183 | nonnorm |
Number of prior malaria episodes (median [range]) | 3.0 [0.0, 30.0] | 1.0 [0.0, 6.0] | 3.0 [0.0, 15.0] | 3.0 [0.0, 30.0] | 2.0 [0.0, 10.0] | 0.211 | nonnorm |
Years residence in Amazon basin (median [range]) | 26.0 [7.0, 64.0] | 21.0 [8.0, 43.0] | 33.0 [7.0, 54.0] | 30.0 [18.0, 54.0] | 19.0 [7.0, 64.0] | 0.033 | nonnorm |
Numbers are n. (%) unless otherwise noted. Fisher's exact test used for categorical variables. Kruskal-wallis Rank Sum Test used for continuous variables. | |||||||
Make correlation matrices for each timepoint
#To fix p values and correlation coefficient on the same plot: https://stackoverflow.com/questions/63227830/r-corrplot-plot-correlation-coefficients-along-with-significance-stars
#timepoint 0
timepoint_0_mat <- alldat_norm_bkgd_rem %>%
filter(Group == "Brazilian") %>%
filter(AntigenType == "P. vivax antigen") %>%
filter(Timepoint == 0) %>%
select(c(Subject, Antigen, FI_norm_bkgd_rem)) %>%
pivot_wider(., id_cols = Subject, names_from = Antigen, values_from = FI_norm_bkgd_rem) %>%
column_to_rownames(var = "Subject") %>%
as.matrix() %>%
psych::corr.test(., method = "spearman", adjust = "holm")
#timepoint 30
timepoint_30_mat <- alldat_norm_bkgd_rem %>%
filter(Group == "Brazilian") %>%
filter(AntigenType == "P. vivax antigen") %>%
filter(Timepoint == 30) %>%
select(c(Subject, Antigen, FI_norm_bkgd_rem)) %>%
pivot_wider(., id_cols = Subject, names_from = Antigen, values_from = FI_norm_bkgd_rem) %>%
column_to_rownames(var = "Subject") %>%
as.matrix() %>%
psych::corr.test(., method = "spearman")
#timepoint 60
timepoint_60_mat <- alldat_norm_bkgd_rem %>%
filter(Group == "Brazilian") %>%
filter(AntigenType == "P. vivax antigen") %>%
filter(Timepoint == 60) %>%
select(c(Subject, Antigen, FI_norm_bkgd_rem)) %>%
pivot_wider(., id_cols = Subject, names_from = Antigen, values_from = FI_norm_bkgd_rem) %>%
column_to_rownames(var = "Subject") %>%
as.matrix() %>%
psych::corr.test(., method = "spearman")
#timepoint 180
timepoint_180_mat <- alldat_norm_bkgd_rem %>%
filter(Group == "Brazilian") %>%
filter(AntigenType == "P. vivax antigen") %>%
filter(Timepoint == 180) %>%
select(c(Subject, Antigen, FI_norm_bkgd_rem)) %>%
pivot_wider(., id_cols = Subject, names_from = Antigen, values_from = FI_norm_bkgd_rem) %>%
column_to_rownames(var = "Subject") %>%
as.matrix() %>%
psych::corr.test(., method = "spearman")
Assess number of samples per timepoint
## `summarise()` has grouped output by 'Antigen'. You can override using the
## `.groups` argument.
## # A tibble: 36 × 3
## # Groups: Antigen [9]
## Antigen Timepoint n
## <chr> <dbl> <int>
## 1 Pv12 0 44
## 2 Pv12 30 40
## 3 Pv12 60 22
## 4 Pv12 180 21
## 5 Pv41 0 44
## 6 Pv41 30 40
## 7 Pv41 60 22
## 8 Pv41 180 21
## 9 PvDBP-FL 0 44
## 10 PvDBP-FL 30 40
## # … with 26 more rows
Plot correlation heatmaps by timepoint
day 0
corrplot::corrplot(timepoint_0_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_0_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
day 30
corrplot::corrplot(timepoint_30_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_30_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
day 60
corrplot::corrplot(timepoint_60_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_60_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
day 180
corrplot::corrplot(timepoint_180_mat$r,
method = "square",
order = "hclust",
hclust.method = "ward.D2",
type = "upper",
p.mat = timepoint_180_mat$p,
insig = "label_sig",
sig.level = c(.001, .01, .05),
pch.cex = 1,
pch.col = "white",
tl.col="black",
addCoef.col = "black",
outline=FALSE)
Before preceding further, we want to determine what a seropositive is using the North American controls to set a threshold for positivity (mean + 3 st dev).
We first filter out the North American controls, noting that only all were ran on Plate 1 except for NAC 1, which was run on all plates. For consistency, and being that we already normalized based on the positive control standard, we will only use NACs on Plate 1 for this.
Evaluate seroprevalence (% seropositive) for each timepoint, faceted by antigen
all_sero <- alldat_norm_bkgd_rem %>%
filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive)) %>%
group_by(Antigen, Timepoint) %>%
summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
ggplot(., aes(x=timepoint, y = seroprevalence)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent) +
ylab("% seropositive") +
xlab("days from acute vivax malaria") +
geom_hline(yintercept = .50, linetype = "dotted", color="red") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(~Antigen)
All sites
The data shows that >50% seroprevalence is maintained for Pv41, PvGAMA, and PvRBP2 from acute presentation all the way up to 180 days. Pv12 and PvEBP also demonstrate more prolonged maintenance of seropositivity 180 days from acute vivax malaria.
Evaluate seroprevalence (% seropositive) for each timepoint, faceted by antigen and study site
foo_dat <- alldat_norm_bkgd_rem %>%
left_join(., alldat %>%
dplyr::select(Subject, city) %>%
distinct(Subject, city),
by = "Subject") %>%
filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive))
acrelandia <- foo_dat %>%
filter(city == "Acrelândia") %>%
filter(!is.na(city)) %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive)) %>%
group_by(Antigen, Timepoint) %>%
summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
ggplot(., aes(x=timepoint, y = seroprevalence)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent) +
ylab("% seropositive") +
xlab("days from acute vivax malaria") +
geom_hline(yintercept = .50, linetype = "dotted", color="red") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(~Antigen)
california <- foo_dat %>%
filter(city == "Califónia") %>%
filter(!is.na(city)) %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive)) %>%
group_by(Antigen, Timepoint) %>%
mutate(city = gsub("Califónia", "Califórnia", city)) %>%
group_by(Antigen, Timepoint) %>%
summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
ggplot(., aes(x=timepoint, y = seroprevalence)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent) +
ylab("% seropositive") +
xlab("days from acute vivax malaria") +
geom_hline(yintercept = .50, linetype = "dotted", color="red") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(~Antigen)
jessica <- foo_dat %>%
filter(city == "Jéssica") %>%
filter(!is.na(city)) %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive)) %>%
group_by(Antigen, Timepoint) %>%
summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
ggplot(., aes(x=timepoint, y = seroprevalence)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent) +
ylab("% seropositive") +
xlab("days from acute vivax malaria") +
geom_hline(yintercept = .50, linetype = "dotted", color="red") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(~Antigen)
placido <- foo_dat %>%
filter(city == "Plácido") %>%
filter(!is.na(city)) %>%
filter(!is.na(Timepoint)) %>%
filter(!is.na(seropositive)) %>%
group_by(Antigen, Timepoint) %>%
summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
ggplot(., aes(x=timepoint, y = seroprevalence)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent) +
ylab("% seropositive") +
xlab("days from acute vivax malaria") +
geom_hline(yintercept = .50, linetype = "dotted", color="red") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(~Antigen)
all_sero_plots <- ggarrange(acrelandia, california, jessica, placido,all_sero,
labels = c("Acrelândia", "Califórnia","Jéssica","Plácido", "all sites"), ncol = 2, nrow = 3)
# cairo_pdf(filename = paste0(figdir, "Figure S1 Seroprevalence by Site.pdf"),
# height = 11, width = 8.5)
# all_sero_plots
# dev.off()
We take advantage of the longitudinal data to ask how many people are seropositive at admission and either seroconvert or serorevert over time?
Reduce to subjects who had data at all four time points
mytable <- table("subject" = alldat_norm_bkgd_rem$Subject, "time point" = alldat_norm_bkgd_rem$Timepoint)
#with 0, 30, 60, 180
subjects_with_0_3_60_180 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
rownames_to_column(var = "Subject") %>%
as_tibble() %>%
dplyr::filter(`0` != 0 & `30` != 0 & `60` != 0 & `180` != 0) %>%
.$Subject
#length(unique(subjects_with_0_3_60_180))
alldat_norm_bkgd_rem %>%
filter(Subject %in% subjects_with_0_3_60_180) %>%
filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
filter(!is.na(Timepoint)) %>%
mutate(timepoint = factor(Timepoint)) %>%
mutate(subject = factor(Subject)) %>%
mutate(reactivity = factor(seropositive, levels = c(0,1), labels = c("seronegative","seropositive"))) %>%
select(subject, Antigen, timepoint, FI_norm_bkgd_rem, reactivity, seropositive) %>%
group_by(Antigen, timepoint) %>%
mutate(subject = fct_reorder(subject, FI_norm_bkgd_rem, .desc = FALSE)) %>%
ungroup() %>%
ggplot(., aes(x=timepoint, y = subject, fill = reactivity)) +
geom_tile(color = "white", size = 0.1) +
scale_fill_manual(values = c("gray80", "black")) +
#scale_fill_viridis(name="reactivity", discrete = TRUE, option = "magma", direction = -1) +
coord_equal() +
facet_wrap(~Antigen, nrow = 1) +
labs(x="days from acute vivax", y="subjects", title = "Seropositivity over time") +
ggthemes::theme_tufte(base_family="Helvetica") +
theme(axis.ticks=element_blank(),
axis.text=element_text(size=12),
panel.border=element_blank(),
plot.title=element_text(hjust=0),
strip.text=element_text(hjust=0),
panel.spacing.x = unit(0.25, "cm"),
panel.spacing.y = unit(0.25, "cm"),
legend.title=element_text(size=8),
legend.title.align=1,
legend.text=element_text(size=8),
legend.position="bottom",
legend.key.size=unit(0.2, "cm"),
legend.key.width=unit(1, "cm"))
Lastly, we can also visualize as percent of max, which also does not need any normalization or background subtraction given these are canceled out. Hence, raw FI is used.
Strategy is to group by subject and antigen and divide raw FI over max(FI) and express as a percentage.
alldat_norm_bkgd_rem_pctmax <- alldat_norm_bkgd_rem %>%
group_by(Plate, Subject, Antigen) %>%
mutate(FI_pct_max = FI_norm_bkgd_rem/max(FI_norm_bkgd_rem, na.rm = TRUE)) %>%
ungroup()
Determine subjects who have data at days 0 and 30 and days 0, 30, 60.
mytable <- table("subject" = alldat_norm_bkgd_rem$Subject, "time point" = alldat_norm_bkgd_rem$Timepoint)
#subjects with at least day 0 and 30 data
subjects_with_0_30 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
rownames_to_column(var = "Subject") %>%
as_tibble() %>%
dplyr::filter(`0` != 0 & `30` != 0) %>%
.$Subject
#length(unique(subjects_with_0_30))
#with 0, 30, 60
subjects_with_0_3_60 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
rownames_to_column(var = "Subject") %>%
as_tibble() %>%
dplyr::filter(`0` != 0 & `30` != 0 & `60` != 0) %>%
.$Subject
#length(unique(subjects_with_0_3_60))
Estimate the curve from day 0 through day 180 as exponential decay. For this, we determine the timepoint with the max FI for each antigen for each subject, and then analyze only values from the max timepoint onward to give the most accurate model of decay. This is because values prior to the max are not needed to determine decay and actually makes the decay model less accurate
You can plot the exponential decay fit using the following: (see https://stackoverflow.com/questions/41101841/exponential-fit-in-ggplot-r). By taking the natural log of the FI, you can then plot as linear decay.
Decay be approximated by as simple linear model. Since we’re using repeated measures, we will opt for a mixed effects model with random intercepts for subjects. Since we have different slopes for each antigen, we will use the do and tidy functions applied to lmer.
Multiple linear regression with ln(reactivity) as the dependent variable and time (days) as the independent variable in separate models unadjusted or adjusted for years living in the malaria-endemic Amazon region. A separate model was run for each antigen. Half-life (t1/2) was calculated as ln(0.5)/estimate and further converted to months by dividing by 30. P values were adjusted for multiple comparisons (antigens) using the Benjamini-Hochberg (BH) procedure.
Requirements 1. Use only data from timepoint with max FI for each subject and antigen onward so as to accurately model decay 2. Select only subjects with at least two datapoints from max timepoint onward
myantigens <- c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "PvEBP", "Pv12", "PvMSP3a", "tetanus toxoid")
#https://academic.oup.com/abt/article/4/3/144/6309336
#tetanus antibodies: https://www.nejm.org/doi/full/10.1056/NEJMoa066092
library(lmerTest)
halflife_res_mixed <- alldat_norm_bkgd_rem_pctmax %>%
select(c(Subject, Group, AntigenType, Antigen, Timepoint, Years_Endemic, gender, age, FI_norm_bkgd_rem, FI_pct_max)) %>%
filter(Group == "Brazilian") %>% #filter only Brazilian samples
filter(Subject != "pos_control") %>% #remove positive controls
filter(!is.nan(FI_pct_max)) %>% #remove NaNs
filter(Antigen %in% myantigens) %>% #filter on only Pv antigens that are have seroprevalence >50%
left_join(., alldat %>%
filter(Group == "Brazilian") %>% #filter only Brazilian samples
filter(Subject != "pos_control") %>% #remove positive controls
select(Subject, "Number of prior malaria episodes") %>%
rename(Prior_malaria = "Number of prior malaria episodes") %>%
distinct(Subject, .keep_all = TRUE) %>%
mutate(Prior_malaria = as.integer(Prior_malaria)),
by = "Subject") %>%
mutate(Antigen = factor(Antigen, levels = c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "PvEBP", "Pv12", "PvMSP3a", "tetanus toxoid"))) %>%
mutate(Years_Endemic = as.numeric(Years_Endemic)) %>% #this line is the one that gives the "NAs introduced by coercion" message
mutate(Years_Endemic = ifelse(is.na(Years_Endemic),median(Years_Endemic, na.rm = TRUE),Years_Endemic)) %>% #impute missing variable for 2 subjects
mutate(Prior_malaria = ifelse(is.na(Prior_malaria),median(Prior_malaria, na.rm = TRUE),Prior_malaria)) %>% #impute missing variable for 4 subjects
mutate(ln_FI_pct_max = log(FI_pct_max+0.0001)) %>%
mutate(ln_FI_norm_bkgd_rem = log(FI_norm_bkgd_rem+1)) %>%
arrange(Subject, Antigen, Timepoint) %>%
group_by(Subject, Antigen) %>%
mutate(days_from_max_pct = Timepoint - Timepoint[which.max(FI_norm_bkgd_rem)]) %>%
filter(between(row_number(),which.max(FI_norm_bkgd_rem), n())) %>% #this line removes all values before the max so that we can estimate decay from the highest available value
filter(n()>1) %>% #filter subjects with at least two data points
ungroup()
#Subject 32 did not have tetanus toxoid response, so removed from analysis
halflife_res_mixed <- halflife_res_mixed %>%
filter(!(Subject == "032" & Antigen == "tetanus toxoid"))
Plot exponential decay
#add small prior to FI_pct_max to avoid taking log of zero
exponential_decay_plot_all <- halflife_res_mixed %>%
ggplot(., aes(x=days_from_max_pct, y = FI_pct_max + 0.0001, fill= Antigen, group = Antigen, color = Antigen)) +
geom_smooth(method="glm", formula=y~x, method.args=list(family=gaussian(link="log")), alpha = 0.15, se = TRUE) +
geom_hline(yintercept = c(0.25,0.5,0.75,1), linetype = "dotted", color = "gray") +
scale_y_continuous(labels = scales::percent, breaks = c(0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(0,30,60,90,120,150,180)) +
ylab("percent of max antibody reactivity") +
xlab("days after max antibody reactivity") +
theme_bw() +
theme(text = element_text(size=11),
axis.ticks=element_blank(),
axis.text=element_text(size=6.5),
panel.border=element_blank(),
plot.title=element_text(hjust=0),
strip.background = element_blank(),
panel.spacing.x = unit(0.25, "cm"),
panel.spacing.y = unit(0.25, "cm"),
legend.position = "top",
legend.title=element_blank(),
legend.title.align=1,
legend.text=element_text(size=8),
legend.key.size=unit(0.15, "cm"),
legend.key.width=unit(0.15, "cm")) +
scale_color_brewer(palette="Spectral") +
scale_fill_brewer(palette="Spectral")
exponential_decay_plot_all
#refit model
nls_fit <- halflife_res_mixed %>%
nls(formula = FI_pct_max + 0.0001 ~ a*exp(-b *days_from_max_pct),
start = c(a=10, b=0.01), data = .)
#coef(nls_fit)
#add small prior to FI_pct_max to avoid taking log of zero
exponential_decay_plot_all_facet <- halflife_res_mixed %>%
ggplot(., aes(x=days_from_max_pct, y = FI_pct_max + 0.0001, fill= Antigen, group = Antigen, color = Antigen)) +
geom_point(colour="black",pch=21, size=2.5) +
geom_smooth(method="glm", formula=y~x, method.args=list(family=gaussian(link="log")), alpha = 0.15, se = TRUE) +
geom_hline(yintercept = c(0.25,0.5,0.75,1), linetype = "dotted", color = "gray") +
scale_y_continuous(labels = scales::percent, breaks = c(0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(0,30,60,90,120,150,180)) +
ylab("percent of max antibody reactivity") +
xlab("days after max antibody reactivity") +
theme_bw() +
theme(text = element_text(size=11),
axis.ticks=element_blank(),
axis.text=element_text(size=6.5),
panel.border=element_blank(),
plot.title=element_text(hjust=0),
strip.background = element_blank(),
panel.spacing.x = unit(0.25, "cm"),
panel.spacing.y = unit(0.25, "cm"),
legend.position = "top",
legend.title=element_blank(),
legend.title.align=1,
legend.text=element_text(size=8),
legend.key.size=unit(0.15, "cm"),
legend.key.width=unit(0.15, "cm")) +
scale_color_brewer(palette="Spectral") +
scale_fill_brewer(palette="Spectral") +
stat_regline_equation(label.y = 1.07, label.x = 100, aes(label = ..adj.rr.label..), color = "gray10", cex = 3) +
theme(legend.position = "none") +
facet_wrap(~Antigen)
exponential_decay_plot_all_facet
halflife_res_mixed <- halflife_res_mixed %>%
left_join(., alldat %>%
dplyr::select(c(Subject, city)) %>%
filter(!duplicated(Subject)) %>%
drop_na(),
by = "Subject")
library(tableone)
library(flextable)
library(officer)
source("/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Vivax Brazil Immune Responses/PvBrazilAbKinetics/customtab.R")
foo1 <- lmer_unadj_res %>%
dplyr::select(Antigen, estimate, std.error, statistic, halflife_days, halflife_days_lowCI, halflife_days_highCI)
# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table 2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days) and subject as fixed and random effects, respectively. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_lmer_unadj_res <- custom_tab(foo1, header, footer)
Table 2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity | ||||||
Antigen | estimate | std.error | statistic | halflife_days | halflife_days_lowCI | halflife_days_highCI |
tetanus toxoid | -0.0016 | 0.00031 | -5.3 | 430 | 310 | 690 |
PVX_081550 | -0.0067 | 0.00079 | -8.5 | 100 | 83 | 130 |
PvRBP2b | -0.0076 | 0.00072 | -11.0 | 91 | 76 | 110 |
Pv12 | -0.0085 | 0.00120 | -7.3 | 82 | 64 | 110 |
Pv41 | -0.0110 | 0.00100 | -11.0 | 64 | 54 | 79 |
PvEBP | -0.0140 | 0.00190 | -7.0 | 51 | 39 | 71 |
PvDBP-FL | -0.0160 | 0.00230 | -6.9 | 44 | 34 | 63 |
PvMSP3a | -0.0170 | 0.00220 | -7.7 | 41 | 32 | 55 |
PvGAMA | -0.0210 | 0.00180 | -12.0 | 32 | 28 | 39 |
A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days) and subject as fixed and random effects, respectively. Half-life (t1/2) was calculated as ln(0.5)/estimate. | ||||||
library(tableone)
library(flextable)
library(officer)
source("/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Vivax Brazil Immune Responses/PvBrazilAbKinetics/customtab.R")
foo2 <- lmer_adj_res %>%
dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days, halflife_days_lowCI, halflife_days_highCI)
# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table S2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity, adjusted analysis", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days), sex, age, city, and years in the malaria-endemic Amazon region as fixed effects and subject as a random effect. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_lmer_adj_res <- custom_tab(foo2, header, footer)
Table S2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity, adjusted analysis | |||||||
Antigen | estimate | std.error | BH-adjusted.p.value | significant | halflife_days | halflife_days_lowCI | halflife_days_highCI |
tetanus toxoid | -0.0016 | 0.00031 | 9.10e-05 | * | 430 | 310 | 690 |
PVX_081550 | -0.0068 | 0.00079 | 2.70e-10 | * | 100 | 83 | 130 |
PvRBP2b | -0.0076 | 0.00072 | 1.00e-11 | * | 91 | 76 | 110 |
Pv12 | -0.0086 | 0.00120 | 7.20e-08 | * | 81 | 63 | 110 |
Pv41 | -0.0110 | 0.00100 | 1.20e-11 | * | 64 | 54 | 79 |
PvEBP | -0.0140 | 0.00190 | 3.50e-07 | * | 51 | 40 | 72 |
PvDBP-FL | -0.0150 | 0.00230 | 6.70e-07 | * | 45 | 35 | 65 |
PvMSP3a | -0.0170 | 0.00220 | 3.50e-07 | * | 41 | 32 | 56 |
PvGAMA | -0.0210 | 0.00180 | 1.30e-12 | * | 32 | 28 | 39 |
A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days), sex, age, city, and years in the malaria-endemic Amazon region as fixed effects and subject as a random effect. Half-life (t1/2) was calculated as ln(0.5)/estimate. | |||||||
protein_dat <- data.frame(Protein = c("PVX_097720",
"PVX_088910",
"PVX_113775",
"PVX_110810",
"PVX_000995",
"PVP01_0102300",
"PVX_081550",
"PVX_094255",
"PVX_098585"),
Antigen = c("PvMSP3a",
"PvGAMA",
"Pv12",
"PvDBP-FL",
"Pv41",
"PvEBP",
"PVX_081550",
"PvRBP2b",
"PvRBP1a"))
longley_dat <- readRDS(paste0(datadir, "Brazil Thailand PNG AlphaScreen Antibody Response dataframe Aditi 05222022.rds")) %>%
dplyr::rename(Protein = "Pv code") %>%
right_join(., protein_dat, by = "Protein") %>%
filter(Antigen != "PvEBP") %>%
select(Protein, Antigen, everything())
longley_dat_excel <- readxl::read_excel(paste0(datadir, "Table_S1_final_Longley_Mueller.xlsx")) %>%
right_join(., protein_dat, by = "Protein") %>%
filter(!duplicated(Protein)) %>%
arrange(desc(Braz_d_half)) %>%
select(Protein, Antigen, Braz_d_half, Braz_d_half_low, Braz_d_half_high) %>%
rename(halflife_days_Longley = Braz_d_half) %>%
rename(Longley_lowCI = Braz_d_half_low) %>%
rename(Longley_highCI = Braz_d_half_high)
lmer_adj_res_longley <- lmer_adj_res %>%
left_join(., longley_dat_excel,
by = "Antigen") %>%
select(Antigen, Protein, everything()) %>%
rename(PlasmoDB_Accession = Protein)
longley_dat_clean <- longley_dat %>%
filter(site == "Brazil") %>%
#mutate(Antigen = factor(Antigen, levels = c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "Pv12", "PvMSP3a"))) %>%
rename(Subject = `Subject ID`) %>%
mutate(Week = as.numeric(gsub("w", "", visit))) %>%
mutate(Day = Week*7) %>%
mutate(Month = Week/4) %>%
arrange(Subject, Antigen, Week) %>%
select(Subject, Antigen, visit, Day, Week, Month, response) %>%
group_by(Subject, Antigen, visit, Day, Week, Month) %>%
summarise_at(vars(response), mean, na.rm = TRUE) %>%
filter(Week != 36) %>% # responses at week 36 go up, so only use data from before week 36
ungroup() %>%
group_by(Subject, Antigen) %>%
mutate(days_from_max_pct = Day - Day[which.max(response)]) %>%
filter(between(row_number(),which.max(response), n())) %>% #this line removes all values before the max so that we can estimate decay from the highest available value
filter((response > 0 & Day == 0) | Day > 0) %>% #filter subjects with zero response
filter(n()>1) %>% #filter subjects with at least two data points
ungroup() %>%
mutate(dummy_name = paste(Subject, Antigen, sep="_"))
#remove subjects where all values are zero
longley_dat_keep <- longley_dat_clean %>%
group_by(Subject, Antigen) %>%
summarize(sum = sum(response)) %>%
filter(sum > 0) %>%
ungroup() %>%
mutate(dummy_name = paste(Subject, Antigen, sep="_"))
longley_dat_filtered <- longley_dat_clean %>%
mutate(dummy_name = paste(Subject, Antigen, sep="_")) %>%
filter(dummy_name %in% longley_dat_keep$dummy_name)
lmer_adj_res_longley <- longley_dat_filtered %>%
select(Subject, Antigen, Day, response) %>%
mutate(ln_response = log(response+1)) %>% #add 1 to all values to avoid taking log of zero
group_by(Antigen) %>%
do(broomExtra::tidy(lmerTest::lmer(ln_response~Day + (1|Subject), data = .), conf.int = TRUE)) %>%
ungroup() %>%
mutate(p.value = as.numeric(formatC(p.value, format = "e", digits = 2))) %>%
mutate("BH-adjusted.p.value" = as.numeric(p.adjust(p.value, method = "BH"))) %>%
mutate(halflife_days_longley_recalc = log(0.5)/estimate) %>%
mutate(halflife_days_lowCI_longley_recalc = log(0.5)/conf.low) %>%
mutate(halflife_days_highCI_longley_recalc = log(0.5)/conf.high) %>%
mutate(halflife_months_longley_recalc = halflife_days_longley_recalc/30) %>%
mutate(halflife_years_longley_recalc = halflife_days_longley_recalc/365.25) %>%
mutate(across(where(is.numeric), signif, 2)) %>%
dplyr::filter(term == "Day") %>%
mutate(significant = ifelse(.$"BH-adjusted.p.value"<0.05, "*", "NS")) %>%
mutate("BH-adjusted.p.value" = formatC(.$"BH-adjusted.p.value", format = "e", digits = 2)) %>%
arrange(desc(halflife_days_longley_recalc)) %>%
dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days_longley_recalc, halflife_days_lowCI_longley_recalc, halflife_days_highCI_longley_recalc)
lmer_adj_res_longley_short <- lmer_adj_res_longley %>%
dplyr::select(Antigen, contains("halflife"))
lmer_adj_res_longley_recalc <- protein_dat %>%
right_join(., lmer_unadj_res,
by = "Antigen") %>%
left_join(., lmer_adj_res_longley_short,
by = "Antigen") %>%
select(Antigen, Protein, everything()) %>%
rename(PlasmoDB_Accession = Protein) %>%
mutate(overlapping_CIs = ifelse(
(halflife_days_highCI_longley_recalc < halflife_days_lowCI) |
(halflife_days_highCI < halflife_days_lowCI_longley_recalc) ,
"no", "yes"))
lmer_adj_res_longley_recalc %>%
filter(overlapping_CIs == "yes") %>%
select(Antigen)
## Antigen
## 1 Pv12
## 2 Pv41
## 3 PVX_081550
## 4 PvRBP2b
lmer_adj_res_longley_recalc %>%
filter(overlapping_CIs == "no") %>%
select(Antigen)
## Antigen
## 1 PvMSP3a
## 2 PvGAMA
## 3 PvDBP-FL
#https://stackoverflow.com/questions/58657802/forest-plot-with-subgroups-in-ggplot2
df_Tenshi = data.frame(Antigen = lmer_adj_res_longley_recalc$Antigen,
Days = lmer_adj_res_longley_recalc$halflife_days,
LCI = lmer_adj_res_longley_recalc$halflife_days_lowCI,
UCI = lmer_adj_res_longley_recalc$halflife_days_highCI,
Study = rep("current study", nrow(lmer_adj_res_longley_recalc)))
df_Longley = data.frame(Antigen = lmer_adj_res_longley_recalc$Antigen,
Days = lmer_adj_res_longley_recalc$halflife_days_longley_recalc,
LCI = lmer_adj_res_longley_recalc$halflife_days_lowCI_longley_recalc,
UCI = lmer_adj_res_longley_recalc$halflife_days_highCI_longley_recalc,
Study = rep("Longley et al. PLoS NTD 2017 re-analysis", nrow(lmer_adj_res_longley_recalc)))
df_Longley_og = data.frame(Antigen = longley_dat_excel$Antigen,
Days = longley_dat_excel$halflife_days_Longley,
LCI = longley_dat_excel$Longley_lowCI,
UCI = as.numeric(longley_dat_excel$Longley_highCI),
Study = rep("Longley et al. PLoS NTD 2017 original analysis", nrow(longley_dat_excel)))
df_all <- rbind(df_Tenshi, df_Longley, df_Longley_og) %>%
mutate(Antigen = factor(Antigen, levels = c("PvMSP3a",
"PvGAMA",
"Pv12",
"PvDBP-FL",
"Pv41",
"PvEBP",
"PVX_081550",
"PvRBP2b",
"PvRBP1a"))) %>%
mutate(Study = factor(Study, levels = c("Longley et al. PLoS NTD 2017 original analysis",
"Longley et al. PLoS NTD 2017 re-analysis",
"current study"))) %>%
drop_na() %>%
filter(Antigen != "PvEBP") %>%
filter(Antigen != "PvRBP1a")
CI_plots <- ggplot(df_all, aes(x=fct_reorder(Antigen, LCI, min, .desc = FALSE), y=Days,
ymin=LCI, ymax=UCI, col=Study, fill=Study)) + #specify position here
geom_linerange(size=2.5,position=position_dodge(width = 0.8), alpha = 0.6) +
geom_point(size=1.5, shape=21, colour="white", stroke = 0.25,position=position_dodge(width = 0.8)) +
scale_fill_brewer(palette = "Set2")+
scale_color_brewer(palette = "Set2")+
scale_x_discrete(name="Antigen") +
scale_y_continuous(name="T1/2 (days)", n.breaks = 8) +
coord_flip() +
theme_minimal() +
theme(legend.position = "right")
CI_plots